/* kxTok - quick little tokenizer for stuff first
 * loaded into memory.  Originally developed for
 * "Key eXpression" evaluator.
 *
 * This file is copyright 2002 Jim Kent, but license is hereby
 * granted for all use - public, private or commercial. */

#include "common.h"
#include "kxTok.h"

boolean includeQuotes = FALSE;

static struct kxTok *kxTokNew(enum kxTokType type, char *string, int stringSize,
                              boolean spaceBefore)
/* Allocate and initialize a new token. */
{
  struct kxTok *tok;
  int totalSize = stringSize + sizeof(*tok);
  tok = needMem(totalSize);
  tok->type = type;
  tok->spaceBefore = spaceBefore;
  memcpy(tok->string, string, stringSize);
  return tok;
}

struct kxTok *kxTokenizeFancy(char *text, boolean wildAst, boolean wildPercent,
                              boolean includeHyphen)
/* Convert text to stream of tokens. If 'wildAst' is
 * TRUE then '*' character will be treated as wildcard
 * rather than multiplication sign.
 * If wildPercent is TRUE then the '%' character will be treated as a
 * wildcard (as in SQL) rather than a modulo (kxtMod) or percent sign.
 * If includeHyphen is TRUE then a '-' character in the middle of a String
 * token will be treated as a hyphen (part of the String token) instead of
 * a new kxtSub token. */
{
  struct kxTok *tokList = NULL, *tok;
  char c, *s, *start = NULL, *end = NULL;
  enum kxTokType type = 0;
  boolean spaceBefore = FALSE;

  s = text;
  for (;;) {
    if ((c = *s) == 0)
      break;
    start = s++;
    if (isspace(c)) {
      spaceBefore = TRUE;
      continue;
    } else if (isalnum(c) || c == '?' || (wildAst && c == '*') ||
               (wildPercent && c == '%')) {
      if (c == '?')
        type = kxtWildString;
      else if (wildAst && c == '*')
        type = kxtWildString;
      else if (wildPercent && c == '%')
        type = kxtWildString;
      else
        type = kxtString;
      for (;;) {
        c = *s;
        if (isalnum(c) || c == ':' || c == '_' || c == '.' ||
            (includeHyphen && c == '-'))
          ++s;
        else if (c == '?' || (wildAst && c == '*') ||
                 (wildPercent && c == '%')) {
          type = kxtWildString;
          ++s;
        } else
          break;
      }
      end = s;
    } else if (c == '"') {
      type = kxtString;
      if (!includeQuotes)
        start = s;
      for (;;) {
        c = *s++;
        if (c == '"')
          break;
        if (c == '*' || c == '?' || (wildPercent && c == '%'))
          type = kxtWildString;
      }
      if (!includeQuotes)
        end = s - 1;
      else
        end = s;
    } else if (c == '\'') {
      type = kxtString;
      if (!includeQuotes)
        start = s;
      for (;;) {
        c = *s++;
        if (c == '\'')
          break;
        if (c == '*' || c == '?' || (wildPercent && c == '%'))
          type = kxtWildString;
      }
      if (!includeQuotes)
        end = s - 1;
      else
        end = s;
    } else if (c == '=') {
      type = kxtEquals;
      end = s;
    } else if (c == '&') {
      type = kxtAnd;
      end = s;
    } else if (c == '|') {
      type = kxtOr;
      end = s;
    } else if (c == '^') {
      type = kxtXor;
      end = s;
    } else if (c == '+') {
      type = kxtAdd;
      end = s;
    } else if (c == '-') {
      type = kxtSub;
      end = s;
    } else if (c == '*') {
      type = kxtMul;
      end = s;
    } else if (c == '/') {
      type = kxtDiv;
      end = s;
    } else if (c == '(') {
      type = kxtOpenParen;
      end = s;
    } else if (c == ')') {
      type = kxtCloseParen;
      end = s;
    } else if (c == '!') {
      type = kxtNot;
      end = s;
    } else if (c == '>') {
      if (*s == '=') {
        ++s;
        type = kxtGE;
      } else
        type = kxtGT;
      end = s;
    } else if (c == '<') {
      if (*s == '=') {
        ++s;
        type = kxtLE;
      } else
        type = kxtLT;
      end = s;
    } else if (c == '.') {
      type = kxtDot;
      end = s;
    } else if (c == '%') {
      type = kxtMod;
      end = s;
    } else if (ispunct(c)) {
      type = kxtPunct;
      end = s;
    } else {
      errAbort("Unrecognized character %c", c);
    }
    tok = kxTokNew(type, start, end - start, spaceBefore);
    slAddHead(&tokList, tok);
    spaceBefore = FALSE;
  }
  tok = kxTokNew(kxtEnd, "end", 3, spaceBefore);
  slAddHead(&tokList, tok);
  slReverse(&tokList);
  return tokList;
}

struct kxTok *kxTokenize(char *text, boolean wildAst)
/* Convert text to stream of tokens. If 'wildAst' is
 * TRUE then '*' character will be treated as wildcard
 * rather than multiplication sign. */
{
  return kxTokenizeFancy(text, wildAst, FALSE, FALSE);
}

void kxTokIncludeQuotes(boolean val)
/* Pass in TRUE if kxTok should include quote characters in string tokens. */
{
  includeQuotes = val;
}
